Load libraries necessary to run the script.
library(tidyverse) # general
## Warning: package 'dplyr' was built under R version 4.2.3
## Warning: package 'stringr' was built under R version 4.2.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggdist) # plot distributions
library(ggside) # plot distributions on the plot margins
## Registered S3 method overwritten by 'ggside':
## method from
## +.gg ggplot2
library(easystats) # estimate and process statistical models
## # Attaching packages: easystats 0.6.0 (red = needs update)
## ✖ bayestestR 0.13.1 ✔ correlation 0.8.4
## ✖ datawizard 0.7.1 ✖ effectsize 0.8.3
## ✖ insight 0.19.1.6 ✖ modelbased 0.8.6
## ✖ performance 0.10.3 ✖ parameters 0.21.0
## ✖ report 0.5.7 ✖ see 0.7.5
##
## Restart the R-Session and update packages in red with `easystats::easystats_update()`.
library(patchwork) # combine multiple plots
library(modelsummary) # for tables
## Warning: package 'modelsummary' was built under R version 4.2.3
## Warning in !is.null(rmarkdown::metadata$output) && rmarkdown::metadata$output
## %in% : 'length(x) = 2 > 1' in coercion to 'logical(1)'
##
## Attaching package: 'modelsummary'
##
## The following object is masked from 'package:parameters':
##
## supported_models
##
## The following object is masked from 'package:insight':
##
## supported_models
options("modelsummary_format_numeric_latex" = "plain")
options(modelsummary_get = "broom")
library(see) # for plotting
library(ggraph) # needs to be loaded
library(ggdist) # plotting distributions
library(kableExtra) # export tables to latex
##
## Attaching package: 'kableExtra'
##
## The following object is masked from 'package:dplyr':
##
## group_rows
library(correlation) # easy correlation
df <- read.csv("data/data_pruned_for_analysis_2024-04-26.csv")%>%
mutate(
Typology = fct_relevel(video_primary_category, "urban", "green"),
Valence = datawizard::rescale(Valence, to = c(1, 7), range = c(1, 9)),
Arousal = datawizard::rescale(Arousal, to = c(1, 7), range = c(1, 9))
) %>%
rename("Participant" = Anonymised_ID )
length(unique(df$Participant))
## [1] 377
video_summary <- df %>%
group_by(video_name_df) %>%
tally() %>%
arrange(n) %>%
summarise(m = mean(n), s = sd(n), r = paste( range(n), collapse = " - "))
Each video was seen 23.16 times on average (SD = 1.876, range = 19 - 28).
dfenv <- df %>%
group_by(video_name_df_keep, Typology) %>%
summarise_at(.vars = c( "Restoration", "WillingnessToWalk","Presence", "Width", "Structure", "Beauty","Interest", "Scenic", "Familiarity", "Valence","Arousal", "Crowdedness" ), .funs = function(x) mean(x, na.rm = T)) %>%
ungroup() %>%
mutate(Typology = recode_factor(Typology, "urban" = "Urban", "green" = "Green" )) %>%
rename(`Willingness to walk` = WillingnessToWalk)
Here we only provide an overview to confirm we have loaded the correct amount of participants – as a sanity check. Participant characteristics are analysed and reported by a different file (Table 1).
dfsub <- df |>
group_by(Participant) |>
select(Participant, Age, Sex, Language, Crowd_Preference_Mean, starts_with("ipip"), SSA_Mean, SPS_Total_Mean, SIAS_Total_Mean, NSS,
Upbringing_Urban, Current_Urban) |>
slice(1)
nrow(dfsub) # should be 377
## [1] 377
A total of 377 participants (Mean age = 30.8, SD = 10.0, range: [18, 67]; Sex: 49.3% females, 50.7% males, 0.0% other) participants were included for analysis.
(dfsub |>
ggplot(aes(x = Age)) +
geom_density(fill = "orange") +
geom_vline(xintercept = mean(dfsub$Age), color = "red", linetype = "dashed") +
scale_x_continuous(expand = c(0, 0)) +
scale_y_continuous(expand = c(0, 0)) +
theme_modern()) |
(dfsub |>
ggplot(aes(x = Sex, fill = Sex)) +
geom_bar(stat = "count") +
scale_y_continuous(expand = c(0, 0)) +
scale_fill_see_d(guide = "none") +
theme_modern())
p_corrplot_u <- dfenv %>%
filter(Typology == "Urban") %>%
select(WLW = `Willingness to walk`, Crowdedness, Valence, Arousal, Restoration, Presence, Beauty, Interest, Structure, Width) %>%
correlation(., bayesian = T) %>%
summary(redundant = FALSE) %>%
plot() + scale_x_discrete(position = "top", expand = c(0, 0)) + labs(title = "Urban spaces") + scale_y_discrete(expand = c(0, 0)) +
theme_modern() + theme(legend.position = "none")
## Warning in genhypergeo_series_pos(U = c((n - 1)/2, (n - 1)/2), L = ((n + :
## Series not converged.
## Warning in genhypergeo_series_pos(U = c((n - 1)/2, (n - 1)/2), L = ((n + :
## Series not converged.
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
p_corrplot_g <- dfenv %>%
filter(Typology == "Green") %>%
select(WLW = `Willingness to walk`, Crowdedness, Valence, Arousal, Restoration, Presence, Beauty, Interest, Structure, Width) %>%
correlation(., bayesian = T) %>%
summary(redundant = FALSE) %>%
plot() +
scale_x_discrete(position = "top", expand = c(0, 0)) + labs(title = "Green spaces") +
scale_y_discrete(expand = c(0, 0)) +
theme_modern() +
theme(legend.position = "bottom")
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
p_corr <- p_corrplot_u / p_corrplot_g + plot_annotation(tag_levels = "A", title = "Correlation Matrix of emotions and environmental appraisals", subtitle = "Self-reported measures", caption = "Note: WLW = willingness to walk.")
p_corr
ggsave(p_corr, filename = "output/plots_corr.jpg", width = 13, height = 10, scale = 1, dpi = 300)
Calculates bayesian t-test and reports BF, to be used when making tables of descriptives.
custom_ttest <- function(x, ...) {
# Construct vectors of data y, and groups (strata) g
y <- unlist(x)
g <- factor(rep(1:length(x), times=sapply(x, length)))
if (is.numeric(y)) {
# For numeric variables, perform a standard 2-sample t-test
# p <- t.test(y ~ g)$p.value
p <- BayesFactor::ttestBF(x=y, y= g)
p <- p@bayesFactor$bf
# print(p)
} else {
# For categorical variables, perform a chi-squared test of independence
p <- chisq.test(table(y, g))$p.value
}
# Format the p-value, using an HTML entity for the less-than sign.
# The initial empty string places the output on the line below the variable label.
c("", sub("<", "<", format.pval(p, digits=3, eps=0.001)))
}
count_missing <- function(x) {
sum(is.na(x))
}
order_names <- c( "WillingnessToWalk",
"Arousal",
"Valence",
"Restoration",
"Presence" ,
"Beauty",
"Crowdedness",
"Width",
"Structure",
"Scenic",
"Interest",
"Familiarity")
ttest_bf <- df %>%
select(video_name, Typology, Restoration:Arousal) %>%
pivot_longer(cols = 3:14, values_transform = as.numeric) %>%
filter(!is.na(value)) %>%
group_by(Typology, name) %>%
nest() %>%
spread(key = Typology, value = data) %>%
mutate(
t_test = map2(urban, green, ~{BayesFactor::ttestBF(.x$value, .y$value)@bayesFactor}[,-3]),
urban = map(urban, nrow),
green = map(green, nrow)
) %>%
unnest(cols = c(urban, green, t_test)) %>%
arrange(factor(name, levels =order_names)) %>%
ungroup() %>%
select(`BF*` = bf)
## t is large; approximation invoked.
## t is large; approximation invoked.
## t is large; approximation invoked.
## t is large; approximation invoked.
## t is large; approximation invoked.
## t is large; approximation invoked.
## t is large; approximation invoked.
## t is large; approximation invoked.
## t is large; approximation invoked.
## t is large; approximation invoked.
length(ttest_bf)
## [1] 1
length(3:14)
## [1] 12
stimuli = df %>% group_by(video_name) %>% slice(1)%>% ungroup() %>% group_by(Typology) %>%count()
df %>%
select( Typology, order_names) %>%
mutate(Typology = recode(Typology, "urban" = "Urban (397 stimuli)", "green"= "Green (83 stimuli)")) %>%
as.data.frame() %>%
datasummary(formula = All(.) ~ Typology * ( (`Unique (\\#)` = N) + Mean + SD ), # (`Missing (%)` = PercentMissing) +
add_columns = ttest_bf,
title = "Descriptive statistics of emotional reactions and environmental appraisals of the stimuli, grouped by typology. \\label{tab:responseDescriptives}",
notes = '(*) Bayes Factor of a bayesian t-test',
output = 'output/table_response_descriptives.tex')
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
## # Was:
## data %>% select(order_names)
##
## # Now:
## data %>% select(all_of(order_names))
##
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
penv <- dfenv %>%
pivot_longer(Restoration : Crowdedness) %>%
ggplot(aes(x = name, y = value, colour = Typology)) +
# geom_boxplot( position = position_dodge()) +
# geom_line(aes(group = name_df_keep), alpha =.2)+
geom_jitter(position = position_jitterdodge(), alpha = 0.5) +
ggdist::stat_halfeye(position = position_dodge(), aes(fill = Typology),
## custom bandwidth
adjust = .5,
## adjust height
width = .6,
## move geom to the right
justification = -.4,
## remove slab interval
.width = 0,
point_colour = NA) +
scale_color_manual(values = c("Urban" = col_urban, "Green" = col_green)) +
scale_fill_manual(values = c("Urban" = col_urban, "Green" = col_green)) +
coord_flip() +
scale_y_continuous(breaks = 1:7) +
geom_hline(yintercept = 4, linetype = "dashed") +
# scale_y_discrete(position = "top") +
theme_modern() +
theme(legend.position = "bottom") +
labs(title="Average rating per stimulus", y = "Rating", x = "Dimension")
penv